In [56]:
import numpy as np
from numpy.random import poisson
import matplotlib.pyplot as plt
from time import time
import numpy.linalg as LA
from scipy import stats
%matplotlib inline

In [3]:
def simARPoisson(T, a, mu0):
    p = 2
    c = np.zeros(T + 2)
    mu = np.zeros(T + 2 + 1)
    mu[:2 + 1] = mu0
    c[:p] = poisson(mu0, size=p)
    a0, a1, a2 = a    
    for t in range(p, T + p):
        c[t] = poisson(mu[t])
        ar = a0 + a1 * c[t] + a2 * c[t-1]
        mu[t + 1] = np.exp(ar)      
    return c[p:], mu[p:]

def set_data(p, x):
    temp = x.flatten()
    n = len(temp[p:])
    x_T = temp[p:].reshape((n, 1))
    X_p = np.ones((n, p + 1))
    for i in range(1, p + 1):
        X_p[:, i] = temp[i - 1: i - 1 + n]
    return X_p, x_T

Task 1. AR Poisson process.

1.1

We simulate our poisson process with the given parameters. We plotted both the countings and the mean at each time step.


In [68]:
np.random.seed(19)
T = 1000
a = np.array([.2, -.1, .1])
mu0 = .5
c, mu = simARPoisson(T, a, mu0)
plt.plot(c,'.', label='Countings')
plt.plot(mu, label='Mean')
plt.legend()
plt.xlabel('time')
plt.ylabel('countings')


Out[68]:
<matplotlib.text.Text at 0x7f800ed47748>

We can see from the previous plot that the mean is more or less the same in the entire process, with a noise component. We know of course that the mean is dependent on the previous counting, but we could decide to test in approximation if our counting data is poisson distributed with a parameter equal to the mean of the means through time. What we obtain is a good compatability between our data and the hypothesis. We will not do a quantitative analysis of this, but it is nice to see that our approximation is not totally off.


In [71]:
import collections
counter=collections.Counter(c)
print(counter)
plt.bar(list(counter.keys()), list(counter.values()) / np.sum(list(counter.values())), label='data')
plt.plot(np.arange(10), stats.poisson.pmf(np.arange(10), np.mean(mu), loc=0), '*r', label='approx')
plt.xlabel('countings')
plt.ylabel('p')
plt.legend()


Counter({1.0: 364, 0.0: 288, 2.0: 231, 3.0: 81, 4.0: 30, 5.0: 3, 6.0: 2, 9.0: 1})
Out[71]:
<matplotlib.legend.Legend at 0x7f800f0fd940>

1.2

Computing the loglikelihood for various a1, we can see that the maxima is approximately the true value of a1. Approximately for the obvious noise in the data


In [70]:
a0, a1, a2 = a
z = a0 + a2 * c[:-2]

x = np.arange(-.6, .4, .001)
loss = np.zeros(len(x))
for i, a1 in enumerate(x):
    a[1] = a1  
    logmu = a0 + a1 * c[1:-1] + a2 * c[:-2]
    loss[i] = np.sum((c[2:] * logmu - np.exp(logmu)))
plt.plot(x, loss)
plt.plot(x[loss==max(loss)], loss[loss==max(loss)], '*')
plt.grid()
x[loss==max(loss)]


Out[70]:
array([-0.095])

1.3

For finding the parameter that maximizes our loglikelihood we can run a gradient ascend. The result is quite close to the real one, even if not equal to -0.1


In [78]:
gamma = 0.000005
itera = 500
a1 = 0.4
a1s = np.zeros(itera)
loss = np.zeros(itera)
for i in range(itera):
    logmu = a0 + a1 * c[1:-1] + a2 * c[:-2]
    loss[i] = np.sum((c[2:] * logmu - np.exp(logmu)))
    a1 += gamma * np.sum(c[2:] * c[1:-1] - np.exp(logmu) * c[1:-1])
    a1s[i] = a1
print (a1)


-0.0944928907766

In [77]:
plt.plot(loss)
plt.xlabel('iteration')
plt.ylabel('Log Likelihood')
plt.figure()
plt.plot(a1s)
plt.xlabel('iteration')
plt.ylabel('a1')


Out[77]:
<matplotlib.text.Text at 0x7f800e849908>

Task 2. Granger Causality.

2.1


In [91]:
T = 1000
np.random.seed(1)
a0 = np.array([[0], [0]])
A1 = np.array([[.2, -.2], [0., .1]])
A2 = np.array([[.1, -.1], [0., .1]])
Sigma = np.eye(2) * 0.01
x = np.zeros((2, T + 2))
for t in range(2, T + 2):
    x1 = np.dot(A1, x[:, [t - 1]])
    x2 = np.dot(A2, x[:, [t -2]])
    x[:, [t]] = (a0 + x1 + x2 + np.random.normal(0, 0.01, size=(2, 1)))
x = x[:, 2:]

In [92]:
plt.plot(x[0, :], label='x_1')
plt.plot(x[1, :], label='x_2')
plt.xlabel('time')
x.T.shape


Out[92]:
(1000, 2)

We


In [31]:
p = 2
X_p, x_1 = set_data(2, x[0,:])
XpXp = np.dot(X_p.T, X_p)
A = np.dot(LA.inv(XpXp), np.dot(X_p.T, x_1))
A
#X_foo, _ = set_data(2, x[1, :])
#X_p = np.hstack((X_p, X_foo[:, 1:]))


Out[31]:
array([[  4.43866746e-05],
       [  1.02493031e-01],
       [  2.30119359e-01]])

In [7]:
res = x_1 - np.dot(X_p, A)
S = np.dot(res.T, res) / (T - p)
LL1 = -(T - p) / 2 * np.log(2 * np.pi) - (T - p) / 2 * \
    np.log(LA.det(S)) - 1 / 2 * \
    np.trace(np.dot(np.dot(res, LA.inv(S)), res.T))
LL1

In [33]:
p = 2
#X_p, x_1 = set_data(2, x[0,:])
X_foo, _ = set_data(2, x[1, :])
X_p = np.hstack((X_p, X_foo[:, 1:]))
XpXp = np.dot(X_p.T, X_p)
A = np.dot(LA.inv(XpXp), np.dot(X_p.T, x_1))
A


Out[33]:
array([[  5.07065016e-05],
       [  1.03905299e-01],
       [  2.04363239e-01],
       [  3.47540885e-01],
       [ -3.20000000e+01],
       [ -5.00000000e-01],
       [  3.20000000e+01]])

In [18]:
res = x_1 - np.dot(X_p, A)
S = np.dot(res.T, res) / (T - p)
LL2 = -(T - p) / 2 * np.log(2 * np.pi) - (T - p) / 2 * \
    np.log(LA.det(S)) - 1 / 2 * \
    np.trace(np.dot(np.dot(res, LA.inv(S)), res.T))
LL2


Out[18]:
3173.8112865385751

In [19]:
com = 2 * (LL2 - LL1)
from scipy.stats import chi2
chi2.sf(com, 2, loc=0, scale=1)


Out[19]:
1.6345092643614563e-10

In [16]:


In [38]:
p = 2
X_p, x_1 = set_data(2, x[1,:])
XpXp = np.dot(X_p.T, X_p)
A = np.dot(LA.inv(XpXp), np.dot(X_p.T, x_1))
A


Out[38]:
array([[  1.54451362e-05],
       [  1.19323751e-01],
       [  9.33730035e-02]])

In [39]:
res = x_1 - np.dot(X_p, A)
S = np.dot(res.T, res) / (T - p)
LL1 = -(T - p) / 2 * np.log(2 * np.pi) - (T - p) / 2 * \
    np.log(LA.det(S)) - 1 / 2 * \
    np.trace(np.dot(np.dot(res, LA.inv(S)), res.T))
LL1


Out[39]:
31911.385173215102

In [40]:
p = 2
#X_p, x_1 = set_data(2, x[0,:])
X_foo, _ = set_data(2, x[0, :])
X_p = np.hstack((X_p, X_foo[:, 1:]))
XpXp = np.dot(X_p.T, X_p)
A = np.dot(LA.inv(XpXp), np.dot(X_p.T, x_1))
A


Out[40]:
array([[  1.19457041e-05],
       [  1.21178215e-01],
       [  9.37231185e-02],
       [  8.44789968e-03],
       [  5.86174709e-03]])

In [41]:
res = x_1 - np.dot(X_p, A)
S = np.dot(res.T, res) / (T - p)
LL2 = -(T - p) / 2 * np.log(2 * np.pi) - (T - p) / 2 * \
    np.log(LA.det(S)) - 1 / 2 * \
    np.trace(np.dot(np.dot(res, LA.inv(S)), res.T))
LL2


Out[41]:
31912.134672992477

In [42]:
com = 2 * (LL2 - LL1)
chi2.sf(com, 2, loc=0, scale=1)


Out[42]:
0.47260290028596996

In [ ]: